1 Load Data

heterocyst_data <- read_excel(file.path("data", "2018_Silverman_et_al-heterocyst_data.xlsx"))

2 Heterocyst data

2.1 SI plot with all exponential phase data points

heterocyst_data %>% 
  # select one growth phase only to keep it interpretable 
  filter(growth_phase == "exponential") %>% 
  # define global plot aesthetics
  ggplot(aes(x = pN2, fill = factor(pN2))) + 
  # individual data points
  geom_jitter(map = aes(y = n_cbh, shape = factor(experiment)), alpha = 0.3, width = 0.02, color = "black") + 
  # box plot
  geom_boxplot(map = aes(y = n_cbh), alpha = 0.5, color = "black", width=0.1, outlier.alpha = 0, coef = 0) + 
  # averages
  geom_point(data = function(df) group_by(df, organism, pN2) %>% summarize(n_cbh.avg = mean(n_cbh)), 
             map = aes(y = n_cbh.avg), size = 4, shape = 21, color = "black", fill = "white") + 
  # panels
  facet_wrap(~organism, ncol=2, scales = "free_y") + 
  # scales
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  scale_shape_manual(values = 21:25) +
  scale_x_continuous(expand = c(0,0), breaks = (0:5)*0.2) + 
  coord_cartesian(xlim = c(-0.1, 0.9)) +
  # labels
  labs(x = TeX("$pN_{2}$ \\[bar\\]"), y = "distance (# cells)") +
  # theme
  theme_figure(grid = FALSE, legend = FALSE, text_size = 20) 

2.2 SI comparison plot of density distributions across different growth conditions

heterocyst_data %>% 
  # focus on cylindrica
  filter(organism == "A. cylindrica") %>% 
  # add formatted pN2 label for paneling
  mutate(pN2_formatted = sprintf("%0.1f bar", pN2)) %>% 
  # define global plot aesthetics
  ggplot(aes(x = n_cbh, y = ..density.., color = growth_phase, group = growth_phase)) +
  # density curves
  geom_density() +
  # vertical lines for means
  geom_vline(data = function(df) df %>% group_by(growth_phase, pN2_formatted) %>% 
               summarize(n_cbh_mean = mean(n_cbh)),
             map = aes(xintercept = n_cbh_mean, color = growth_phase), linetype = 2) +
  # vertical lines for medians
  geom_vline(data = function(df) df %>% group_by(growth_phase, pN2_formatted) %>% 
               summarize(n_cbh_median = median(n_cbh)),
             map = aes(xintercept = n_cbh_median, color = growth_phase), linetype = 3) +
  # scales
  scale_x_continuous(expand = c(0,0), lim = c(0,40)) +
  scale_color_manual(values = cbPalette) +
  # labels
  labs(x = "distance (# cells)", y = "density", color = "Growth phase", fill = "Growth phase") + 
  # grid
  facet_grid(pN2_formatted~.) +
  # theme
  theme_figure(grid = FALSE, legend = TRUE, text_size = 20) 

3 Bootstrapped heterocyst data

3.1 Bootstrap means and medians for the heterocyst data

# bootstrap heterocyst data
n_bootstrap <- 1000
heterocyst_data_summary <- 
  heterocyst_data %>% 
  nest(-organism, -pN2, -growth_phase) %>% 
  mutate(
    # number of samples
    n_samples = map_int(data, nrow),
    # bootstrap means and medians
    bootstrap = map(data, ~boot(data = .x$n_cbh, R = n_bootstrap,
        statistic = function(x, idx) c(mean(x[idx]), median(x[idx])))),
    # resulting bootstrapped means and standard errors
    n_cbh_mean = map_dbl(bootstrap, ~.x$t0[1]),
    n_cbh_mean_se = map_dbl(bootstrap, ~sd(.x$t[,1])),
    n_cbh_median = map_dbl(bootstrap, ~.x$t0[2]),
    n_cbh_median_se = map_dbl(bootstrap, ~sd(.x$t[,2]))
  ) %>% 
  arrange(organism, growth_phase, pN2)

# safe in data for other analysis scripts
heterocyst_data_summary %>% 
  select_if(~!is.list(.x)) %>% 
  write.xlsx(file.path("data", "2018_Silverman_et_al-heterocyst_data_summary.xlsx"))

# export as data table for SI
heterocyst_data_table <-
  heterocyst_data_summary %>% 
  mutate(
    pN2 = table_round(pN2, n_decs = 1),
    n_samples = table_round(n_samples, n_decs = 0),
    n_cbh_mean = table_format_with_err(n_cbh_mean, n_cbh_mean_se, sig = 2, max = 2),
    n_cbh_median = table_format_with_err(n_cbh_median, n_cbh_median_se, sig = 2, max = 2)
  ) %>% 
  select(organism, growth_phase, pN2, n_samples, n_cbh_mean, n_cbh_median)
heterocyst_data_table %>% export_data_table("heterocyst_numbers.xlsx")

# show data
heterocyst_data_table 

3.2 Evaluate differences between pN2 conditions

Evalulate significant differences in the exponential growth phase based on the bootstrapped means and directly using Wilcox Mann Whitney.

Result: both measures provide identical significance levels.

heterocyst_differences <- 
  heterocyst_data_summary %>% 
  filter(growth_phase == "exponential") %>% 
  select(organism, pN2, data, bootstrap) %>% 
  { full_join(rename(., pN2_a = pN2, data_a = data, bs_a = bootstrap),
              rename(., pN2_b = pN2, data_b = data, bs_b = bootstrap), by = "organism") } %>% 
  filter(pN2_a != pN2_b) %>% 
  mutate(
    # boostrapped difference in mean and standard error of the difference
    bs_diff = map2(bs_a, bs_b, function(a, b) a$t[,1] - b$t[,1]),
    n_cbh_mean_diff = map_dbl(bs_diff, ~mean(.x)),
    n_cbh_mean_diff_se = map_dbl(bs_diff, ~sd(.x)),
    # is this difference in mean statistically significant?
    # evaluate by calculating the p-value for H0: bootstrapped difference in means == 0 
    # --> first calcualte the t statistic for H0, then evaluate the p value based on the t distribution
    t_pval = ((n_cbh_mean_diff - 0)/n_cbh_mean_diff_se) %>% { 2*pt(-abs(.), df = length(bs_diff) - 1) },
    t_signif = t_pval %>% { ifelse( . < 0.001, "***", ifelse(. < 0.01, "**", ifelse(. < 0.05, "*", "-"))) },
    # alternatively: evaluate difference in original distribution using Wilcox Mann Whitney test
    wilcox_pval = map2_dbl(data_a, data_b, function(a, b) wilcox.test(a$n_cbh, b$n_cbh)$p.value),
    wilcox_signif = wilcox_pval %>% { ifelse( . < 0.001, "***", ifelse(. < 0.01, "**", ifelse(. < 0.05, "*", "-"))) }
  ) %>% 
  select(-starts_with("data"), -bs_diff, -bs_a, -bs_b) %>% 
  arrange(organism, pN2_a, pN2_b)

# list significance levels for both bootstrapped and WMW test
heterocyst_differences %>% 
  select(organism, pN2_a, pN2_b, t_signif, wilcox_signif) 
# export as data table for SI
heterocyst_differences_table <-
  heterocyst_differences %>% 
   mutate(
    pN2_a = table_round(pN2_a, n_decs = 1),
    pN2_b = table_round(pN2_b, n_decs = 1),
    value = table_format_with_err(n_cbh_mean_diff, n_cbh_mean_diff_se, sig = 2, max = 2) %>% paste0(" (", t_signif, ")")
  ) %>% 
  select(organism, pN2_a, pN2_b, value) %>% 
  spread(pN2_a, value) %>% rename(` ` = pN2_b)  
heterocyst_differences_table %>% export_data_table("heterocyst_comparison.xlsx")

# show data
heterocyst_differences_table

3.3 Main plot visualizing means and standard deviations

heterocyst_data_summary %>% 
  ggplot(aes(x = pN2, shape = growth_phase)) + 
  # bootstrap standard errors (1 sigma)
  geom_errorbar(map = aes(ymin = n_cbh_mean - n_cbh_mean_se, ymax = n_cbh_mean + n_cbh_mean_se), color = "black", width = 0.05, size = 1) + 
  # means
  geom_point(map = aes(y = n_cbh_mean), size = 5, color = "black", fill = "gray") + 
  # organism panels
  facet_wrap(~organism, ncol=2) + 
  # scales
  scale_x_continuous(expand = c(0,0), breaks = (0:5)*0.2) +
  scale_y_continuous(breaks = (0:10)*5) + 
  scale_shape_manual(values = 21:26) +
  coord_cartesian(xlim = c(-0.1, 0.9)) +
  # labels
  labs(x = TeX("$pN_{2}$ \\[bar\\]"), y = "distance (# cells)") +
  # theme
  theme_figure(grid = FALSE, legend = FALSE, text_size = 20) 

4 Enzyme kinetics interpretation

4.1 Load additional data

isotope_data_summary <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_data_summary.xlsx"))
isotope_model <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_model.xlsx"))
growth_rate_data <- read_excel(file.path("data", "2018_Silverman_et_al-growth_rate_data.xlsx"))

4.2 Calculate michaelis menten fit

Based on non-stationary phase data for Anabaena cylindrica.

mm_model <- 
  # combine all the information
  heterocyst_data_summary %>% 
  filter(organism == "A. cylindrica", growth_phase != "stationary") %>%  
  left_join(growth_rate_data, by = c("pN2", "organism")) %>% 
  left_join(isotope_data_summary, by = c("pN2", "organism")) %>% 
  left_join(isotope_model, by = "organism") %>% 
  # run model
  nest(-organism, -growth_phase) %>% 
  mutate(
    # bootstrap Km using nonlinear least squares fit to find Km
    data = map(data, na.omit),
    bootstrap = map(data, ~boot(data = .x, R = n_bootstrap,
      statistic = function (data, idx) {
        if (all(data[idx,]$pN2 == data[idx,]$pN2[1])) {
          return(NaN) # identical resamples, can't fit a model
        }
        
        # model fit for resampled data
        safe_nls <- safely(nls)
        m <- safe_nls(
          n_cbh_mean ~ pN2/growth_rate.day * 1/(Km.bar * eps_fix / (eps_aq_gas - eps_mean) + pN2) + 0, 
          data = data[idx,], start = list(Km.bar = 0.5), algorithm = "plinear")
        coefs <- tidy(m$result)
        
        # return value
        if (is.null(m$error)) {
          return(c(filter(coefs, term == "Km.bar")$estimate, filter(coefs, term == ".lin")$estimate))
        } else {
          return(c(NaN, NaN)) # nls did not converge
        }
      })),
    bootstrap_data = map(bootstrap, ~.x$t[!is.nan(.x$t[,1]), ]),
    n_converged = map_dbl(bootstrap_data, nrow),
    Km.bar = map_dbl(bootstrap_data, ~mean(.x[,1])),
    Km_se.bar = map_dbl(bootstrap_data, ~sd(.x[,1])),
    slope = map_dbl(bootstrap_data, ~mean(.x[,2]))
  ) 

# print result of the fit
mm_model %>% select(organism, growth_phase, n_converged, Km.bar, Km_se.bar)